*Data management

*USING STATA MP 16

*********************************************************************************************************
*1.	Append all the Orbis datasets into one file (“append” in Stata)
*********************************************************************************************************

*********************************************************************************************************
*2.	Replace “n.a.” and "n.s." observations with missing, “.”, for all variables
*********************************************************************************************************

*********************************************************************************************************
*3.	Reshape into owner-country-year format
*********************************************************************************************************

*********************************************************************************************************
*4. Use kountry to obtain country identifier in iso3n format
*********************************************************************************************************

*********************************************************************************************************
*5. Clean data and create main variables
*********************************************************************************************************

*generate owner id
by country_firm (id), sort: gen owner_id = _n

*firm-owner-country ID
gen double foc = FirmNumber*100000000 + iso3n*100000 + owner_id
xtset foc year

*firm-country ID
gen double firm_iso3n_id = FirmNumber*1000+iso3n

*The “Shareholder – Direct % 12” variable takes on both numeric and string values
*Replace the string values with numeric values, following Orbis documentation where available:
gen ShareholderDirect_original = ShareholderDirect12

replace ShareholderDirect12 = "100" if ShareholderDirect12=="T"
replace ShareholderDirect12 = "100" if ShareholderDirect12=="WO"
replace ShareholderDirect12 = "0.01" if ShareholderDirect12=="NG"
replace ShareholderDirect12 = "51" if ShareholderDirect12=="MO"
replace ShareholderDirect12 = "100" if ShareholderDirect12=="FC"
replace ShareholderDirect12 = "100" if ShareholderDirect12=="VE"
replace ShareholderDirect12 = "100" if ShareholderDirect12=="BR"
replace ShareholderDirect12 = "" if ShareholderDirect12=="No data fulfill your filter criteria"
replace ShareholderDirect12 = "51" if ShareholderDirect12=="CQP1"
replace ShareholderDirect12 = "0.01" if ShareholderDirect12=="ADV"

split ShareholderDirect12, p("±")
replace ShareholderDirect12 = ShareholderDirect121
replace ShareholderDirect12 = ShareholderDirect122 if ShareholderDirect121==""
drop ShareholderDirect121 ShareholderDirect122

split ShareholderDirect12, p(">")
replace ShareholderDirect12 = ShareholderDirect121
destring ShareholderDirect122, replace 
replace ShareholderDirect122 = ShareholderDirect122+.01 if ShareholderDirect122!=.
tostring ShareholderDirect122, replace force
replace ShareholderDirect12 = ShareholderDirect122 if ShareholderDirect121=="" 
drop ShareholderDirect121 ShareholderDirect122

split ShareholderDirect12, p("<")
replace ShareholderDirect12 = ShareholderDirect121
destring ShareholderDirect122, replace 
replace ShareholderDirect122 = ShareholderDirect122-.01 if ShareholderDirect122!=.
tostring ShareholderDirect122, replace force
replace ShareholderDirect12 = ShareholderDirect122 if ShareholderDirect121=="" 
drop ShareholderDirect121 ShareholderDirect122

replace ShareholderDirect12 = "" if ShareholderDirect12 == "-"
replace ShareholderDirect12 = "" if ShareholderDirect12 == "n.a."
destring ShareholderDirect12, replace

* fill in Shareholders with imputed share if listed as MO for just one year and same in prior and subsequent year
replace ShareholderDirect12 = l.ShareholderDirect12 if l.ShareholderDirect12 == f.ShareholderDirect12 & l.ShareholderDirect12 != . & ShareholderDirect_original == "MO"

* fill in shareholders with imputed share if same right before and right after
replace ShareholderDirect12 = l.ShareholderDirect12 if l.ShareholderDirect12 == f.ShareholderDirect12 & l.ShareholderDirect12 != . & ShareholderDirect12 == .
replace ShareholderDirect12 = l2.ShareholderDirect12 if l2.ShareholderDirect12 == f.ShareholderDirect12 & l2.ShareholderDirect12 != . & ShareholderDirect12 == .
replace ShareholderDirect12 = f2.ShareholderDirect12 if l.ShareholderDirect12 == f2.ShareholderDirect12 & f2.ShareholderDirect12 != . & ShareholderDirect12 == .

*create variable that is greater than 0 for years when we have ownership data
by firm_iso3n_id year, sort: egen max_ownership_share = max(ShareholderDirect12)

* first year with ownership data for each firm
gen t_own = year if max_ownership_share != .
by firm_iso3n_id , sort: egen own_firstyear = min(t_own)
drop t_own

* last year with ownership data for each firm
gen t_own = year if max_ownership_share != .
by firm_iso3n_id , sort: egen own_lastyear = max(t_own)
drop t_own

* drop observations if before first reported ownership year
* drop observations if past last reported ownership year
drop if year < own_firstyear
drop if year > own_lastyear 

	
*********************************************************************************************************
*6a. code majority owners
*********************************************************************************************************
xtset foc year
gen majOwner = 0
replace majOwner = 1 if ShareholderDirect12 >= 50 & ShareholderDirect12 != .
replace majOwner = 1 if l.majOwner == 1 & f.majOwner == 1 & ShareholderDirect12 == . & max_ownership_share < 50
replace majOwner = 1 if l.majOwner == 1 & f.majOwner == 1 & ShareholderDirect12 == . & max_ownership_share == .


*********************************************************************************************************
*6b. code changes to controlling shareholder
*********************************************************************************************************
* controlling shareholder change
* defined as one if current owner wasn't majority owner in past year
* or if current majority owner wasn't majority owner in past year
gen t_controlling_change = 1 if majOwner == 1 & l.majOwner == 0
replace t_controlling_change = 1 if majOwner == 1 & f.majOwner == 0 
by firm_iso3n_id year, sort: egen controlling_change = max(t_controlling_change)
xtset foc year
replace controlling_change = 0 if controlling_change == . & max_ownership_share != . & (l.max_ownership_share != . | f.max_ownership_share != . )
drop t_controlling_change


*********************************************************************************************************
*7. code financial and obfuscated owners
*********************************************************************************************************

* Generate variables identifying financial firms by NAICS description
gen shareholder_finance_dummy = 0
replace shareholder_finance_dummy = 1 if regexm(ShareholderNAICStextdescri, "Accounting, Tax Preparation, Bookkeep")
replace shareholder_finance_dummy = 1 if regexm(ShareholderNAICStextdescri, "Brokerages")
replace shareholder_finance_dummy = 1 if regexm(ShareholderNAICStextdescri, "Financ")
replace shareholder_finance_dummy = 1 if regexm(ShareholderNAICStextdescri, "Securities")
replace shareholder_finance_dummy = 1 if regexm(ShareholderNAICStextdescri, "Credit Intermed")
replace shareholder_finance_dummy = . if ShareholderNAICStextdescri==""

* Generate variables identifying obfuscated owners by those who have no reported last name
replace ShareholderLastname="" if ShareholderLastname=="No data fulfill your filter criteria"
gen shareholder_last_name_dummy = 0
replace shareholder_last_name_dummy = 1 if ShareholderLastname!=""
sum shareholder_last_name_dummy

* Generate variables identifyying obfuscated owners by those who have no LEI
replace ShareholderLegalEntityIdent="" if ShareholderLegalEntityIdent=="No data fulfill your filter criteria"
gen shareholder_legal_entity_dummy = 0
replace shareholder_legal_entity_dummy = 1 if ShareholderLegalEntityIdent!=""
sum shareholder_legal_entity_dummy

*record whether these characteristics apply to the majority owner
gen mo_last_name_dummy = .
replace mo_last_name_dummy = shareholder_last_name_dummy if majOwner == 1
replace mo_last_name_dummy = 0 if majOwner==0

gen mo_LEI_dummy = .
replace mo_LEI_dummy = shareholder_legal_entity_dummy if majOwner == 1
replace mo_LEI_dummy = 0 if majOwner==0

gen mo_finance_dummy = . 
replace mo_finance_dummy = shareholder_finance_dummy if majOwner==1
replace mo_finance_dummy = 0 if majOwner==0

by firm_iso3n_id year, sort: egen mo_last_name = max(mo_last_name_dummy) 
by firm_iso3n_id year, sort: egen mo_LEI = max(mo_LEI_dummy) 
by firm_iso3n_id year, sort: egen mo_finance = max(mo_finance_dummy) 
by firm_iso3n_id year, sort: egen majorityowner = max(majOwner) 

*********************************************************************************************************
*8. keep only firm-level variables and collapse data to firm-year
*********************************************************************************************************

drop id foc owner_id ShareholderDirect12 ShareholderDirect_original majOwner
duplicates drop

xtset firm_iso3n_id year

*********************************************************************************************************
*9. merge with the country-year replication data 
*********************************************************************************************************
